#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time    : 2019/4/1 14:06
@Author  : tonychao
@File    : trainModelFromData2.py
功能：
"""
import sys,os
import dataFileReader
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.externals import joblib
def OutPut(line):
    print ("[python 应用时间预测模型]:\t"+line)
    sys.stdout.flush()

ScriptPath = os.path.split( os.path.realpath( sys.argv[0] ) )[0]
ScriptPath=ScriptPath+os.sep
OutPut("当前脚本目录："+ScriptPath)



#获得数据
#app_md5=sys.argv[1]
#app_class=sys.argv[2]
#app_name="name";app_md5="md5";app_class="class"
if len(sys.argv) >=2 :
    dataFilePath=sys.argv[1]
#dataFilePath="/home/tonychao/CZMworkspace/FinalTuningProject/./historyDataWhenDBnotAvaliable/Application_Record_250a21b717a695b767f40a831f4d4345_tonychaotest.id.App.data"

count_of_groups,totalTimeList,groupConfList,groupTimeList= dataFileReader.readFile(dataFilePath)
dataFileName="Application_Model"+dataFilePath.split(os.sep)[-1][len("Application_Record"):-len(".data")]

modelPath = ScriptPath + ".."+os.sep+'savedModels'+os.sep +dataFileName+ os.sep
if not os.path.exists(modelPath):
    os.makedirs(modelPath)
scalerFile=open(modelPath + 'scaler.data', 'w+') #如果打开的文件已存在，原有内容会被删除


from ModelSelection import chooseBestModel,chooseBestModel_enhanced
from ModelSelection import modelTest_LeaveOneOut
from MLModels import getModelList

# 数据标准化 Standardize features by removing the mean and scaling to unit variance
X_scaler= StandardScaler()
for i in range(count_of_groups):
    groupConf = np.array(groupConfList[i]).astype('float64')
    u=groupConf.mean(axis=0)
    s=groupConf.std(axis=0)
    scalerFile.write(str(u) + " " + str(s) + '\n')
    groupConf=X_scaler.fit_transform(groupConf)
    #model,errList,timeCostInUs=chooseBestModel(groupConf,groupTimeList[i],getModelList(),modelTest_LeaveOneOut)
    model, errList, timeCostInUs = chooseBestModel_enhanced(groupConf, groupTimeList[i], getModelList(), modelTest_LeaveOneOut,totalTimeList,k=1)
    model.fit(groupConf,groupTimeList[i])
    OutPut("choose and select model")
    p=modelPath+'Model_%d.pkl'%(i)
    if os.path.exists(p):
        os.remove(p)
    joblib.dump(model, p)
    OutPut("saveing modle......")

    if (i==0):
        p2 = modelPath + "Model.statistics"
        statisticsFile=open(p2,'w+')
        statisticsFile.write(str(errList.mean())+"\t"+str(errList.std())+"\t"+str(timeCostInUs)+"\n")
        statisticsFile.write("mean -t std -t us -n")
        statisticsFile.close()


scalerFile.close()

sys.stdout.flush()
exit(0)
